* ========================================================= */
/*        PROPENSITY SCORE MATCHING - REJECTED VS ACCEPTED  */
/*                   (forinvestment == 0)                   */
/* ========================================================= */

/* Import data */
clear all

************************************************************************************
*
*	DESCRIPTION: This script applies matching techniques to the dataset.
*                The package "psmatch2" (ssc install psmatch2) is required.
*
************************************************************************************

/* ========================================================= */
/*                  DEFINE MATCHING VARIABLES                */
/* ========================================================= */

/* Time-invariant matching variables */
global xlist "i.outloanyear i.size i.codummy ageatevent i.rating ///
              r3filled i.m_r3filled r13filled i.m_r13filled ///
              i.m_r_collateral_filled r_collateral_filled ///
              logassets_filled i.m_logassets_filled" 

/* Matching excluding rating */
global xlistworating "i.outloanyear i.size i.codummy ageatevent ///
                      r3filled i.m_r3filled r13filled i.m_r13filled ///
                      i.m_r_collateral_filled r_collateral_filled ///
                      logassets_filled i.m_logassets_filled" 

/* Matching excluding missing values */
global xlist_nomissing "i.outloanyear i.size i.codummy ageatevent ///
                        r3filled r13filled r_collateral_filled logassets_filled" 

/* Time-variant matching variables */
global ylist "loan"    

/* Industry-level matching */
global celllist "industry" 

/* ========================================================= */
/*                    LOAD & PREPARE DATA                    */
/* ========================================================= */
use "$output\rejected_agg_industries_modified.dta", clear

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Keep relevant loan types */
keep if loantype == "AU" | loantype == "CL" | loantype == "CO" | loantype == "EX" | loantype == "ST" | accepted == 0

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist r_collateral_filled r13filled logassets_filled r3filled r5filled { 
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* ========================================================= */
/*               CONSTANT CHARACTERISTICS                   */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    keep siren codummy
    save "$output\psm_co_rejected_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear r5filled m_r5filled size rating industry r_collateral_filled ///
         outsideloan ageatevent m_r_collateral_filled r13filled m_r13filled ///
         logassets_filled m_logassets_filled r3filled m_r3filled
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_rejected_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_rejected_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren accepted delta2 $celllist $ylist

/* Reshape dataset */
qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_rejected_keep.dta"
keep if _merge == 3
drop _merge

/* ========================================================= */
/*            K-NEAREST NEIGHBOR PROPENSITY SCORE MATCHING   */
/* ========================================================= */

capture drop _*
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    /* Perform nearest neighbor matching */
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
        /* Visualize the matching outcome */
        capture psgraph, name(cell`i',replace) bin(30)
        capture pstest loan0 loan1 loan2 $xlist
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                SAVE MATCHED SAMPLE                        */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .

sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\rejected_agg_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\rejected_psm_forinvestment0.dta", replace

	
/* ========================================================= */
/*  PROPENSITY SCORE MATCHING - REJECTED VS ACCEPTED         */
/*                   (forinvestment == 1)                   */
/* ========================================================= */

use "$output\rejected_agg_industries_modified.dta", clear

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Keep relevant loan types (tangible investments) */
keep if loantype == "MA" | loantype == "IM" | accepted == 0

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist r_collateral_filled r13filled logassets_filled r3filled r5filled { 
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* ========================================================= */
/*              CONSTANT CHARACTERISTICS                     */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    keep siren codummy
    save "$output\psm_co_rejected_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear r5filled m_r5filled size rating industry r_collateral_filled ///
         outsideloan ageatevent m_r_collateral_filled r13filled m_r13filled ///
         logassets_filled m_logassets_filled r3filled m_r3filled
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_rejected_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_rejected_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren accepted delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

distinct siren if accepted == 1
distinct siren if accepted == 0

sort siren
bys siren: gen nb = _N
su nb, d
drop nb

duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_rejected_keep.dta"
keep if _merge == 3
drop _merge

/* ========================================================= */
/*            K-NEAREST NEIGHBOR PROPENSITY SCORE MATCHING   */
/* ========================================================= */

capture drop _*
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    /* Perform nearest neighbor matching */
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
        /* Visualize the matching outcome */
        capture psgraph, name(cell`i',replace) bin(30)
        capture pstest loan0 loan1 loan2 $xlist
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                SAVE MATCHED SAMPLE                        */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .

sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\rejected_agg_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\rejected_psm_forinvestment1.dta", replace

/* ========================================================= */
/* 		PROPENSITY SCORE MATCHING - REJECTED VS ACCEPTED	*/
/*        UNRATED FIRMS         							*/
/* ========================================================= */

use "$output\rejected_agg_industries_modified.dta", clear

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Convert rating to string */
tostring rating, gen(ratingstr) force

/* Identify firms that were unrated at delta = -1 */
gen temp = ratingstr if deltamonths == -1
bys siren (temp): gen rating_pre = temp[_N]

/* Keep only unrated firms (rating == "1" or missing) */
keep if rating_pre == "1" | rating_pre == ""

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* ========================================================= */
/*               CREATE DUMMY VARIABLES FOR MISSING VALUES   */
/* ========================================================= */
foreach var of varlist r_collateral_filled r13filled logassets_filled r3filled r5filled { 
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* ========================================================= */
/*               GENERATE CODUMMY VARIABLE                   */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    keep siren codummy
    save "$output\psm_co_rejected_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear r5filled m_r5filled size rating industry ///
         r_collateral_filled outsideloan ageatevent m_r_collateral_filled ///
         r13filled m_r13filled logassets_filled m_logassets_filled r3filled m_r3filled
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_rejected_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_rejected_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren accepted delta2 $celllist $ylist

/* Reshape dataset */
qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_rejected_keep.dta"
keep if _merge == 3
drop _merge

/* ========================================================= */
/*            K-NEAREST NEIGHBOR PROPENSITY SCORE MATCHING   */
/* ========================================================= */

capture drop _*
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    /* Perform nearest neighbor matching */
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
        /* Visualize the matching outcome */
        capture psgraph, name(cell`i',replace) bin(30)
        capture pstest loan0 loan1 loan2 $xlist
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                SAVE MATCHED SAMPLE                        */
/* ========================================================= */

keep siren pscore support weight id n1-n5 nn
keep if weight ~= .

sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\rejected_agg_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\rejected_psm_unrated.dta", replace
	
/* ========================================================= */
/* 					REJECTED VS ACCEPTED					*/
/*        PROPENSITY SCORE MATCHING - RATED FIRMS           */
/* ========================================================= */

use "$output\rejected_agg_industries_modified.dta", clear

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Convert rating to string */
tostring rating, gen(ratingstr) force

/* Identify firms with ratings at delta = -1 */
gen temp = ratingstr if deltamonths == -1
bys siren (temp): gen rating_pre = temp[_N]
drop temp

/* Keep only firms with ratings different from "1", "10", "11", "12", "13" */
keep if !inlist(rating_pre,"1","10","11","12","13")

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* ========================================================= */
/*               CREATE DUMMY VARIABLES FOR MISSING VALUES   */
/* ========================================================= */
foreach var of varlist r_collateral_filled r13filled logassets_filled r3filled r5filled { 
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* ========================================================= */
/*               GENERATE CODUMMY VARIABLE                   */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    keep siren codummy
    save "$output\psm_co_rejected_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear r5filled m_r5filled size rating industry ///
         r_collateral_filled outsideloan ageatevent m_r_collateral_filled ///
         r13filled m_r13filled logassets_filled m_logassets_filled r3filled m_r3filled
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_rejected_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_rejected_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren accepted delta2 $celllist $ylist

/* Reshape dataset */
qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_rejected_keep.dta"
keep if _merge == 3
drop _merge

/* ========================================================= */
/*            K-NEAREST NEIGHBOR PROPENSITY SCORE MATCHING   */
/* ========================================================= */

capture drop _*
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    /* Perform nearest neighbor matching */
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
        /* Visualize the matching outcome */
        capture psgraph, name(cell`i',replace) bin(30)
        capture pstest loan0 loan1 loan2 $xlist
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                SAVE MATCHED SAMPLE                        */
/* ========================================================= */

keep siren pscore support weight id n1-n5 nn
keep if weight ~= .

merge 1:m siren using "$output\rejected_agg_industries_modified.dta"
keep if _merge == 3
drop _merge


save "$output\rejected_psm_rated.dta", replace

/* ========================================================= */
/*        PROPENSITY SCORE MATCHING 				         */
/* 					REJECTED VS ACCEPTED					*/
/*                        (TABLE 6)                         */
/* ========================================================= */

use "$output\rejected_agg_industries_modified.dta", clear

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Merge with Table 4 accepted firms */
merge m:1 siren using "$input\liste_table6_accepted.dta", keep(3) nogen 

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist r_collateral_filled r13filled logassets_filled r3filled r5filled { 
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* ========================================================= */
/*               GENERATE CODUMMY VARIABLE                   */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    keep siren codummy
    save "$output\psm_co_rejected_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear r5filled m_r5filled size industry r_collateral_filled ///
         outsideloan ageatevent m_r_collateral_filled r13filled m_r13filled ///
         logassets_filled m_logassets_filled r3filled m_r3filled
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_rejected_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_rejected_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren accepted delta2 $celllist $ylist

/* Reshape dataset */
qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_rejected_keep.dta"
keep if _merge == 3
drop _merge

/* ========================================================= */
/*            K-NEAREST NEIGHBOR PROPENSITY SCORE MATCHING   */
/* ========================================================= */

capture drop _*
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

* We don't match on rating -> too restrictive given the sample size
egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    /* Perform nearest neighbor matching */
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
        /* Visualize the matching outcome */
        capture psgraph, name(cell`i',replace) bin(30)
        capture pstest loan0 loan1 loan2 $xlistworating
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                SAVE MATCHED SAMPLE                        */
/* ========================================================= */

keep siren pscore support weight id n1-n5 nn
keep if weight ~= .

sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\rejected_agg_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\matched_sample_rejected_table6.dta", replace

/* ============================================================================= */
/* 					REJECTED VS ACCEPTED										*/
/*                       TABLE 6 - Investment (PSM Matching)                      */
/* ============================================================================= */

* ------------------------------------------------------------------------------
* Step 1: Load and filter dataset
* ------------------------------------------------------------------------------

use "$output\rejected_agg_industries_modified.dta", clear     	

* Keep only observations within the specified date range
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

* Merge with the list of accepted firms to keep only relevant firms
merge m:1 siren using "$input\liste_table6_accepted.dta", keep(3) nogen 

* Keep firms that either did not receive an accepted loan or have investment-related loans
keep if (accepted == 0) | (loantype == "MA" | loantype == "IM")

* Generate total loan amount variable
gen loan = stloan + ltloan + bm + bi + oc

* Transform loan variable using log transformation
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

* ------------------------------------------------------------------------------
* Step 2: Handle missing values in key financial variables
* ------------------------------------------------------------------------------

foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    * Create a dummy for missing values
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

* ------------------------------------------------------------------------------
* Step 3: Save auxiliary data for PSM
* ------------------------------------------------------------------------------

* Create a dummy indicating if a firm had a credit line before treatment
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

* Save firm characteristics before treatment (delta = -11)
preserve 
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled industry ageatevent r3filled m_r3filled r13filled m_r13filled r_collateral_filled m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace    
restore

* ------------------------------------------------------------------------------
* Step 4: Reshape data for PSM
* ------------------------------------------------------------------------------

drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta

duplicates drop siren delta2, force
keep siren accepted delta2 $celllist $ylist 
sort siren delta2

qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

duplicates drop siren, force

* Merge firm-level characteristics from saved dataset
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

* Convert industry categorical variable into numeric group
egen indg = group(industry)
drop industry
rename indg industry

* ------------------------------------------------------------------------------
* Step 5: Propensity Score Matching (PSM) using k-nearest neighbor
* ------------------------------------------------------------------------------

capture drop _*

* Create placeholders for matching results
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn  {
    gen `name' = .
}

* Create group identifier for matching
egen cell = group($celllist)
qui levels cell, local(gr)

foreach i of local gr {
    
    * Perform PSM matching within each cell
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing matching for cell `i'..."
        
        * Store matching results
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
    } else {
        display "Skipping cell `i' due to insufficient observations..."
    }
}

* ------------------------------------------------------------------------------
* Step 6: Save matched sample for further analysis
* ------------------------------------------------------------------------------

* Retain only relevant variables for matched sample
keep siren pscore support weight id n1-n5 nn
keep if weight != .

* Merge matched firms back with the original dataset
merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

* Save matched dataset
save "$output\matched_sample_accepted_table6_FI.dta", replace

/* ============================================================================= */
/* 					REJECTED VS ACCEPTED										*/
/*                 TABLE 6 - No Investment (PSM Matching)                         */
/* ============================================================================= */

* ------------------------------------------------------------------------------
* Step 1: Load and filter dataset
* ------------------------------------------------------------------------------

use "$output\rejected_agg_industries_modified.dta", clear     	

* Keep only observations within the specified date range
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

* Merge with the list of accepted firms to keep only relevant firms
merge m:1 siren using "$input\liste_table6_accepted.dta", keep(3) nogen 

* Keep firms that either did not receive an accepted loan or whose loan is not for investment purposes
keep if accepted == 0 | ((loantype != "MA") & (loantype != "IM") & loantype != "")

* Generate total loan amount variable
gen loan = stloan + ltloan + bm + bi + oc

* Transform loan variable using log transformation
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

* ------------------------------------------------------------------------------
* Step 2: Handle missing values in key financial variables
* ------------------------------------------------------------------------------

foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    * Create a dummy for missing values
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

* ------------------------------------------------------------------------------
* Step 3: Save auxiliary data for PSM
* ------------------------------------------------------------------------------

* Create a dummy indicating if a firm had a credit line before treatment
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

* Save firm characteristics before treatment (delta = -11)
preserve 
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled industry ageatevent r3filled m_r3filled r13filled m_r13filled r_collateral_filled m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace    
restore

* ------------------------------------------------------------------------------
* Step 4: Reshape data for PSM
* ------------------------------------------------------------------------------

drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta

duplicates drop siren delta2, force
keep siren accepted delta2 $celllist $ylist 
sort siren delta2

qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

duplicates drop siren, force

* Merge firm-level characteristics from saved dataset
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

* Convert industry categorical variable into numeric group
egen indg = group(industry)
drop industry
rename indg industry

* ------------------------------------------------------------------------------
* Step 5: Propensity Score Matching (PSM) using k-nearest neighbor
* ------------------------------------------------------------------------------

capture drop _*

* Create placeholders for matching results
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

* Create group identifier for matching
egen cell = group($celllist)
qui levels cell, local(gr)

foreach i of local gr {
    
    * Perform PSM matching within each cell
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing matching for cell `i'..."
        
        * Store matching results
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
    } else {
        display "Skipping cell `i' due to insufficient observations..."
    }
}

* ------------------------------------------------------------------------------
* Step 6: Save matched sample for further analysis
* ------------------------------------------------------------------------------

* Retain only relevant variables for matched sample
keep siren pscore support weight id n1-n5 nn
keep if weight != .

* Merge matched firms back with the original dataset
sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

* Save matched dataset
save "$output\matched_sample_accepted_table6_noFI.dta", replace

/* ========================================================= */
/*        PROPENSITY SCORE MATCHING - NO MISSING DATA       */
/* 					REJECTED VS ACCEPTED					*/
/* ========================================================= */

use "$output\rejected_agg_industries_modified.dta", clear

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* ========================================================= */
/*               GENERATE CODUMMY VARIABLE                   */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    keep siren codummy
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled industry ageatevent r3filled r13filled r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren accepted delta2 $celllist $ylist

/* Reshape dataset */
qui reshape wide $ylist, i(siren accepted $celllist) j(delta2)

duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* ========================================================= */
/*            K-NEAREST NEIGHBOR PROPENSITY SCORE MATCHING   */
/* ========================================================= */

capture drop _*
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    /* Perform nearest neighbor matching */
    capture psmatch2 accepted loan0 loan1 loan2 loan3 loan4 loan5 $xlistnomissing ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
        /* Visualize the matching outcome */
        capture psgraph, name(cell`i',replace) bin(30)
        capture pstest loan0 loan1 loan2 $xlistnomissing
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                SAVE MATCHED SAMPLE                        */
/* ========================================================= */

keep siren pscore support weight id n1-n5 nn accepted
keep if weight ~= .

sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\rejected_agg_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\matched_sample_accepted_no_missing.dta", replace
